In [1]:
import datetime
from calendar import monthrange
from dateutil.relativedelta import relativedelta

import numpy as np
import pandas as pd
from pivottablejs import pivot_ui
from IPython.core.display import HTML
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = 'simple_white'


## Jupyter widgets + display
from ipywidgets import interact, interactive, fixed, interact_manual,Layout
import ipywidgets as widgets
from IPython.core.display import HTML
from IPython.display import Image
import ipyplot
In [2]:
base_price = pd.read_excel('Cloud invoicing.xlsx', sheet_name='Cluster prices')
log_activity = pd.read_excel('Cloud invoicing.xlsx', sheet_name='System output')
log_activity = log_activity.sort_values(by=['Customer', 'Transactions date'])

Invoicing pivot


Following assumption is made:

  • The first record of a user is his first usage date.
  • There are 4 cluster only. (We can actually make this file more dynamic in the sense that it can take care additional cluster without us adjusting the code. However, as a POC I will make this assumption).

What can be extracted from the pivot: Per month per client

  • Number of days Cluster x was used (Cluster #x usage in days)
  • Cost of usage of Cluster x (Cluster #x cost)
  • Total billing amount for 4 clusters (total_billing_amount)
In [3]:
first_date = log_activity['Transactions date'].min().replace(day=1)
last_date = max(log_activity['Transactions date'].max().replace(day=1),datetime.date.today())
In [4]:
list_billing_months = pd.date_range(first_date, last_date, freq = 'MS').tolist()
In [5]:
list_customers = log_activity['Customer'].unique()
In [6]:
all_customer_df = pd.DataFrame()
for c in list_customers:
    end_of_month = pd.Period(last_date,freq='M').end_time
    idx = pd.date_range(first_date,end_of_month)
    df = pd.DataFrame(log_activity[log_activity['Customer'] == c][['Transactions date','Cluster #1', 'Cluster #2','Cluster #3','Cluster #4']])
    df = df.set_index('Transactions date')
    df = df.reindex(idx, method='ffill')
    df = df.fillna(0)
    df['Customer'] = c
    all_customer_df = pd.concat([all_customer_df, df])
all_customer_df.index = pd.to_datetime(all_customer_df.index)
In [7]:
billing_df = pd.DataFrame()
for m in list_billing_months:
    end_of_month = pd.Period(m,freq='M').end_time
    num_days = monthrange(m.year, m.month)[1]
    x = all_customer_df.loc[(all_customer_df.index >= m) & (all_customer_df.index <= end_of_month)]
    x1 = x.groupby(['Customer']).sum()
    x1['billing_date'] = end_of_month.date()
    x1['day_in_month'] = num_days
    billing_df = pd.concat([x1, billing_df])
In [8]:
billing_df['Cluster #1 cost'] = billing_df['Cluster #1']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #1"]['Cluster price / month'].values[0]
billing_df['Cluster #2 cost'] = billing_df['Cluster #2']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #2"]['Cluster price / month'].values[0]
billing_df['Cluster #3 cost'] = billing_df['Cluster #3']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #3"]['Cluster price / month'].values[0]
billing_df['Cluster #4 cost'] = billing_df['Cluster #4']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #4"]['Cluster price / month'].values[0]
In [9]:
billing_df['total_billing_amount'] = billing_df['Cluster #1 cost'] + billing_df['Cluster #2 cost'] + billing_df['Cluster #3 cost'] + billing_df['Cluster #4 cost']
In [10]:
billing_df = billing_df.rename(columns={"Cluster #1": "Cluster #1 usage in days",
                        "Cluster #2": "Cluster #2 usage in days",
                        "Cluster #3": "Cluster #3 usage in days",
                        "Cluster #4": "Cluster #4 usage in days",})
In [11]:
final = billing_df.drop('day_in_month', axis=1)
In [12]:
final = final.round({'Cluster #1 cost': 2,
                     'Cluster #2 cost': 2,
                     'Cluster #3 cost': 2,
                     'Cluster #4 cost': 2,
                     'billing_amount': 2})
final = final.sort_values(by=['billing_date', 'Customer'])
pivot_ui(final,outfile_path='invoice_pivot.html')
#HTML('invoice_pivot.html')
Out[12]:

YoY comparison


For YoY comparison, we can use monthly , quarterly, yearly or any period that makes sense for the business. In addition, depending on the purpose of the report, we also can have the YoY per client or per client's industry/ sector, etc.

In this challenge, as a proof of concept I will chose monthly and yearly to illustrate how the interactive chart would look like. There is no addition information regarding the customer hence we will have the YoY per client.

In each chart you can zoom in/out, select/de-select the customer using the label on the right side.

Monthly cost YoY

In [13]:
yoy_df = final.copy()
yoy_df = yoy_df.reset_index().set_index('billing_date')
yoy_df.index = pd.to_datetime(yoy_df.index)
In [14]:
monthly_df = yoy_df.copy()
monthly_df['change'] = monthly_df.groupby([monthly_df.index.month,'Customer'])['total_billing_amount'].\
                                            pct_change(fill_method='ffill')*100
monthly_df = monthly_df.reset_index()
In [15]:
fig = px.line(
    monthly_df[monthly_df.change.notnull()],
    x = 'billing_date',
    y = 'change',
    color = 'Customer',
    title = "Monthly billing amount YoY changes in percentage per customer",
    text = 'change',
    labels=dict(billing_date="Month", change="YoY change in monthly billing amount (%)")

)
fig.update_traces(texttemplate='%{text:.3s}%')
fig.show(renderer="notebook")

Yearly cost YoY

In [16]:
pre_year = yoy_df.copy()
In [17]:
pre_year = pd.DataFrame(pre_year.groupby([pre_year.index.year, 'Customer'])['total_billing_amount'].sum())
pre_year['change']=pre_year.groupby(['Customer'])['total_billing_amount'].pct_change()*100
pre_year = pre_year.reset_index()
In [18]:
fig = px.line(
    pre_year[pre_year['change'].notnull()],
    x = 'billing_date',
    y = 'change',
    color = 'Customer',
    category_orders={'billing_date':list(pre_year['billing_date'])},
    title = "Monthly billing amount YoY changes in percentage per customer",
    text = 'change',
    labels=dict(billing_date="Year", change="YoY hange in yearly billing amount (%)")

)
fig.update_traces(texttemplate='%{text:.3s}%')
fig.update_xaxes(type='category')
fig.show(renderer="notebook")
In [19]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show()
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Show code"></form>''')
Out[19]:
In [ ]: